In [1]:
%reload_ext autoreload
%autoreload 2

import numpy as np

import sys
sys.path.append('..')

from helper import nn
from helper import logistic_regression as lr

from sklearn.metrics import classification_report

In [2]:
raw_X, raw_y = nn.load_data('ex3data1.mat')
print(raw_X.shape)
print(raw_y.shape)


(5000, 400)
(5000,)

prepare data


In [3]:
# add intercept=1 for x0
X = np.insert(raw_X, 0, values=np.ones(raw_X.shape[0]), axis=1)
X.shape


Out[3]:
(5000, 401)

In [4]:
# y have 10 categories here. 1..10, they represent digit 0 as category 10 because matlab index start at 1
# I'll ditit 0, index 0 again
y_matrix = []

for k in range(1, 11):
    y_matrix.append((raw_y == k).astype(int))

# last one is k==10, it's digit 0, bring it to the first position
y_matrix = [y_matrix[-1]] + y_matrix[:-1]
y = np.array(y_matrix)

y.shape


Out[4]:
(10, 5000)

train 1 model


In [5]:
t0 = lr.logistic_regression(X, y[0])

In [6]:
print(t0.shape)
y_pred = lr.predict(X, t0)
print('Accuracy={}'.format(np.mean(y[0] == y_pred)))


(401,)
Accuracy=0.9974

Is this real......

train k model


In [7]:
k_theta = np.array([lr.logistic_regression(X, y[k]) for k in range(10)])
print(k_theta.shape)


(10, 401)

making prediction

  • think about the shape of k_theta, now you are making $X\times\theta^T$

    $(5000, 401) \times (10, 401).T = (5000, 10)$

  • after that, you run sigmoid to get probabilities and for each row, you find the highest prob as the answer

In [8]:
prob_matrix = lr.sigmoid(X @ k_theta.T)

In [9]:
np.set_printoptions(suppress=True)
prob_matrix


Out[9]:
array([[ 0.99577632,  0.        ,  0.00053528, ...,  0.00006469,
         0.00003912,  0.00172187],
       [ 0.99834614,  0.0000001 ,  0.00005607, ...,  0.00009684,
         0.0000029 ,  0.00008492],
       [ 0.99139772,  0.        ,  0.00056828, ...,  0.00000654,
         0.02653902,  0.00197393],
       ..., 
       [ 0.00000068,  0.04140121,  0.00320887, ...,  0.00012723,
         0.00297489,  0.70761228],
       [ 0.00001843,  0.00000013,  0.00000009, ...,  0.00164768,
         0.06814687,  0.86116757],
       [ 0.02879673,  0.        ,  0.00012974, ...,  0.36626836,
         0.00497918,  0.14821854]])

In [10]:
y_pred = np.argmax(prob_matrix, axis=1)

In [11]:
y_answer = raw_y.copy()
y_answer[y_answer==10] = 0

In [12]:
print(classification_report(y_answer, y_pred))


             precision    recall  f1-score   support

          0       0.97      0.99      0.98       500
          1       0.95      0.99      0.97       500
          2       0.95      0.92      0.93       500
          3       0.95      0.91      0.93       500
          4       0.95      0.95      0.95       500
          5       0.92      0.92      0.92       500
          6       0.97      0.98      0.97       500
          7       0.95      0.95      0.95       500
          8       0.93      0.92      0.92       500
          9       0.92      0.92      0.92       500

avg / total       0.94      0.94      0.94      5000


In [ ]: